The dataset this week comes from Kaggle and contains all Scooby Doo episodes. More info about Scooby Doo can be found on ScoobyPedia.
pacman::p_load(
tidytuesdayR,
tidyverse,
lubridate,
magrittr,
glue,
janitor,
skimr,
here
)Get data and write to local file
tt_load(2021, week = 29) %>%
pluck("scoobydoo") %>%
write_csv2(here("data.csv"))Read data from local file (d_raw)
d_raw <- read_csv2(
here("data.csv"),
col_types = cols(.default = "c"),
na = c("NA", "NULL")
)Create working copy (d)
d <- d_rawdglimpse(d)## Rows: 603
## Columns: 75
## $ index <chr> "1", "2", "3", "4", "5", "6", "7", "8", "9", …
## $ series_name <chr> "Scooby Doo, Where Are You!", "Scooby Doo, Wh…
## $ network <chr> "CBS", "CBS", "CBS", "CBS", "CBS", "CBS", "CB…
## $ season <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", …
## $ title <chr> "What a Night for a Knight", "A Clue for Scoo…
## $ imdb <chr> "8.1", "8.1", "8", "7.8", "7.5", "8.4", "7.6"…
## $ engagement <chr> "556", "479", "455", "426", "391", "384", "35…
## $ date_aired <chr> "1969-09-13", "1969-09-20", "1969-09-27", "19…
## $ run_time <chr> "21", "22", "21", "21", "21", "21", "21", "21…
## $ format <chr> "TV Series", "TV Series", "TV Series", "TV Se…
## $ monster_name <chr> "Black Knight", "Ghost of Cptn. Cuttler", "Ph…
## $ monster_gender <chr> "Male", "Male", "Male", "Male", "Female", "Ma…
## $ monster_type <chr> "Possessed Object", "Ghost", "Ghost", "Ancien…
## $ monster_subtype <chr> "Suit", "Suit", "Phantom", "Miner", "Witch Do…
## $ monster_species <chr> "Object", "Human", "Human", "Human", "Human",…
## $ monster_real <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ monster_amount <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", …
## $ caught_fred <chr> "FALSE", "FALSE", "FALSE", "TRUE", "FALSE", "…
## $ caught_daphnie <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ caught_velma <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ caught_shaggy <chr> "TRUE", "TRUE", "FALSE", "FALSE", "FALSE", "F…
## $ caught_scooby <chr> "TRUE", "FALSE", "TRUE", "FALSE", "TRUE", "FA…
## $ captured_fred <chr> "FALSE", "TRUE", "FALSE", "FALSE", "FALSE", "…
## $ captured_daphnie <chr> "FALSE", "TRUE", "FALSE", "FALSE", "FALSE", "…
## $ captured_velma <chr> "FALSE", "TRUE", "FALSE", "FALSE", "FALSE", "…
## $ captured_shaggy <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ captured_scooby <chr> "FALSE", "FALSE", "FALSE", "FALSE", "TRUE", "…
## $ unmask_fred <chr> "FALSE", "TRUE", "TRUE", "TRUE", "FALSE", "TR…
## $ unmask_daphnie <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ unmask_velma <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ unmask_shaggy <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ unmask_scooby <chr> "TRUE", "FALSE", "FALSE", "FALSE", "TRUE", "F…
## $ snack_fred <chr> "TRUE", "FALSE", "TRUE", "FALSE", "FALSE", "T…
## $ snack_daphnie <chr> "FALSE", "FALSE", "FALSE", "TRUE", "TRUE", "F…
## $ snack_velma <chr> "FALSE", "TRUE", "FALSE", "FALSE", "FALSE", "…
## $ snack_shaggy <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ snack_scooby <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ unmask_other <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ caught_other <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ caught_not <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ trap_work_first <chr> NA, "FALSE", "FALSE", "TRUE", NA, "TRUE", "FA…
## $ setting_terrain <chr> "Urban", "Coast", "Island", "Cave", "Desert",…
## $ setting_country_state <chr> "United States", "United States", "United Sta…
## $ suspects_amount <chr> "2", "2", "0", "2", "1", "2", "1", "2", "1", …
## $ non_suspect <chr> "FALSE", "TRUE", "TRUE", "FALSE", "FALSE", "F…
## $ arrested <chr> "TRUE", "TRUE", "TRUE", "TRUE", "TRUE", "TRUE…
## $ culprit_name <chr> "Mr. Wickles", "Cptn. Cuttler", "Bluestone th…
## $ culprit_gender <chr> "Male", "Male", "Male", "Male", "Male", "Male…
## $ culprit_amount <chr> "1", "1", "1", "1", "1", "1", "1", "1", "1", …
## $ motive <chr> "Theft", "Theft", "Treasure", "Natural Resour…
## $ if_it_wasnt_for <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "thes…
## $ and_that <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "dog"…
## $ door_gag <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ number_of_snacks <chr> "2", "1", "3", "2", "2", "4", "4", "0", "1", …
## $ split_up <chr> "1", "0", "0", "1", "0", "0", "1", "0", "0", …
## $ another_mystery <chr> "1", "0", "0", "0", "1", "0", "0", "0", "0", …
## $ set_a_trap <chr> "0", "0", "0", "0", "0", "0", "1", "1", "0", …
## $ jeepers <chr> "0", "0", "0", "0", "0", "1", "0", "0", "0", …
## $ jinkies <chr> "0", "0", "0", "0", "0", "0", "0", "0", "0", …
## $ my_glasses <chr> "1", "0", "0", "0", "1", "0", "0", "1", "0", …
## $ just_about_wrapped_up <chr> "0", "0", "0", "0", "0", "0", "0", "0", "0", …
## $ zoinks <chr> "1", "3", "1", "2", "0", "2", "1", "0", "0", …
## $ groovy <chr> "0", "0", "2", "1", "0", "0", "1", "0", "0", …
## $ scooby_doo_where_are_you <chr> "0", "1", "0", "0", "1", "0", "0", "1", "0", …
## $ rooby_rooby_roo <chr> "1", "0", "0", "0", "0", "1", "1", "1", "1", …
## $ batman <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ scooby_dum <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ scrappy_doo <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ hex_girls <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ blue_falcon <chr> "FALSE", "FALSE", "FALSE", "FALSE", "FALSE", …
## $ fred_va <chr> "Frank Welker", "Frank Welker", "Frank Welker…
## $ daphnie_va <chr> "Stefanianna Christopherson", "Stefanianna Ch…
## $ velma_va <chr> "Nicole Jaffe", "Nicole Jaffe", "Nicole Jaffe…
## $ shaggy_va <chr> "Casey Kasem", "Casey Kasem", "Casey Kasem", …
## $ scooby_va <chr> "Don Messick", "Don Messick", "Don Messick", …
Names of columns that appear logical at first glance
# specify columns
col_lgl <- d %>%
select(c(
"arrested",
"batman",
"blue_falcon",
"door_gag",
"hex_girls",
"monster_real",
"non_suspect",
"scooby_dum",
"scrappy_doo",
"trap_work_first",
starts_with("captured_"),
starts_with("caught_"),
starts_with("snack_"),
starts_with("unmask_")
)) %>%
colnames()Names of columns that appear numeric at first glance
# specify columns
col_num <- c(
"another_mystery",
"culprit_amount",
"engagement",
"groovy",
"imdb",
"index",
"jeepers",
"jinkies",
"just_about_wrapped_up",
"monster_amount",
"my_glasses",
"number_of_snacks",
"rooby_rooby_roo",
"run_time",
"scooby_doo_where_are_you",
"season",
"set_a_trap",
"split_up",
"suspects_amount",
"zoinks")Parse date from character vector date_aired
# parse date
d %<>% mutate(aired_date = ymd(date_aired))
# verify result
d %>%
select(date_aired, aired_date) %>%
slice_sample(n = 15)# drop original column
d %<>% select(-date_aired)Extract date elements from aired_date
# get year and month
d %<>% mutate(
aired_year = year(aired_date),
aired_month = month(aired_date, label = TRUE)
)
# get decade
d %<>% mutate(
aired_decade = glue("{floor(aired_year/10) * 10}s")
) %>% as_factor()
# verify result
d %>% select(
aired_date,
aired_year,
aired_month,
aired_decade) %>%
slice_sample(n = 15)Show all unique values in the columns that are presumed to be logical to manually verify this assumption.
# print unique values
d %>%
select(col_lgl) %>%
map(~unique(.x)) %>%
flatten_chr() %>%
unique()## [1] "TRUE" "FALSE" NA
There are indeed no other values than "TRUE"/"FALSE" or NA.
# convert to logical
d %<>% mutate(across(col_lgl, as.logical))
# verify result
d %>% select(col_lgl) %>% slice_sample(n = 15)Do the (presumed) numeric columns contain non-numeric characters?
# store unique values per column
uniq <- d %>%
select(col_num) %>%
map(~unique(.x))
# identify columns with non-numeric characters
non_num <- uniq %>%
map( ~ str_detect(.x, "\\D") %>%
replace_na(FALSE) %>%
any()) %>%
flatten_lgl()
# print unique values in columns with non-numeric characters
uniq[non_num]## $imdb
## [1] "8.1" "8" "7.8" "7.5" "8.4" "7.6" "8.2" "8.5" "8.7" "8.3" "7.7" "7.9"
## [13] "6.9" "7.4" "7.2" "7.3" "7" "6.3" "6.8" "7.1" "6.6" "6.4" "6" "6.7"
## [25] "6.5" "6.2" "6.1" "5.8" "5.1" "5" "5.9" "5.5" "5.3" "5.2" "5.4" "5.6"
## [37] "4.8" "4.6" "4.9" "8.8" "8.6" "9.2" "9.1" "8.9" "9" "9.3" "9.6" "4.2"
## [49] "5.7" NA
##
## $number_of_snacks
## [1] "2" "1" "3"
## [4] "4" "0" "1 box"
## [7] "a couple" "6" "several"
## [10] "3 boxes" "truck load" "5"
## [13] "2 boxes" "lifetime supply" NA
## [16] "8" "10" "wheel barrel full"
## [19] "several boxes"
##
## $season
## [1] "1" "2" "Crossover" "3" "Movie" "Special"
## [7] "4"
The only non-numeric character in imdb is the decimal ., so this column can easily be converted to numeric. On the other hand, for season and number_of_snacks this doesn’t make sense.
# exclude season and number of snacks
col_num_final <- col_num[!(col_num %in% c("season", "number_of_snacks"))]Keep the original columns (temporarily) for easy verification.
# convert to numeric and add suffix
d_num <- d %>%
select(all_of(col_num_final)) %>%
map_dfr(as.numeric) %>%
rename_with( ~ glue("{.x}_num"))
# bind converted data to original data
d %<>% bind_cols(d_num)
# sort columns alphabetically
d %<>% select(sort(colnames(.)))
# verify result
d %>% select(sort(c(
col_num_final, glue("{col_num_final}_num")
))) %>% slice_sample(n = 15)# drop old columns and remove suffix from new columns
d %<>%
select(-(col_num_final)) %>%
rename_with(~ str_remove(.x, "_num"))Identify character columns containing ≥1 comma (might indicate nested data)
d_chr <- d %>% select(where(is.character))
d_chr[d_chr %>%
map( ~ any(str_detect(.x, ","))) %>%
flatten_lgl() %>%
replace_na(FALSE)]Create nested list columns where applicable
# note to self: not working, figure out why...
col_nested <-
c(
"culprit_gender",
"culprit_name",
"monster_gender",
"monster_name",
"monster_species",
"monster_type",
"monster_subtype"
)
d %<>% mutate(
across(
all_of(col_nested),
~ str_squish(str_split(.x, ","))
)
)# the non-elegant -but working- alternative
d$culprit_gender %<>% str_split(",")
d$culprit_name %<>% str_split(",")
d$monster_gender %<>% str_split(",")
d$monster_name %<>% str_split(",")
d$monster_species %<>% str_split(",")
d$monster_type %<>% str_split(",")
d$monster_subtype %<>% str_split(",")Rename some columns for consistency and easy sorting
# change suffix "_va" into prefix "va_" (voice actor)
d %<>% rename_with(
~ glue("va_{str_remove(.x, '_va')}"),
ends_with("_va"))# add prefix "appears_" to indicate appearances in episodes
d %<>% rename_with(
~ glue("appears_{.x}"),
c(
"batman",
"scooby_dum",
"scrappy_doo",
"hex_girls",
"blue_falcon"
)
)# add prefix "quote_" to counts of words/phrases
d %<>% rename_with(
~ glue("quote_{.x}"),
c(
"jeepers",
"jinkies",
"my_glasses",
"just_about_wrapped_up",
"zoinks",
"groovy",
"scooby_doo_where_are_you",
"rooby_rooby_roo"
)
)# sort columns alphabetically
d %<>% select(sort(colnames(.)))Create tidy subsets of the data
d_captured <- d %>%
select(index, starts_with("captured_")) %>%
pivot_longer(
cols = starts_with("captured_"),
names_to = "character",
values_to = "captured") %>%
mutate(across(
"character",
~str_remove(.x, "captured_") %>% str_to_title()
))
d_captured %>% head()d_caught <- d %>%
select(index, starts_with("caught_")) %>%
pivot_longer(
cols = starts_with("caught_"),
names_to = "character",
values_to = "caught") %>%
mutate(across(
"character",
~str_remove(.x, "caught_") %>% str_to_title()
))
d_caught %>% head()d_unmask <- d %>%
select(index, starts_with("unmask_")) %>%
pivot_longer(
cols = starts_with("unmask_"),
names_to = "character",
values_to = "unmask") %>%
mutate(across(
"character",
~str_remove(.x, "unmask_") %>% str_to_title()
))
d_unmask %>% head()d_snack <- d %>%
select(index, starts_with("snack_")) %>%
pivot_longer(
cols = starts_with("snack_"),
names_to = "character",
values_to = "snack") %>%
mutate(across(
"character",
~str_remove(.x, "snack_") %>% str_to_title()
))
d_snack %>% head()d_quote <- d %>%
select(index, starts_with("quote_")) %>%
pivot_longer(
cols = starts_with("quote_"),
names_to = "quote",
values_to = "n") %>%
mutate(across(
"quote",
~ str_remove(.x, "quote_") %>%
str_replace_all("_", " ") %>%
str_to_sentence()
))
d_quote %>% head()d_voice <- d %>%
select(index, starts_with("va_")) %>%
pivot_longer(
cols = starts_with("va_"),
names_to = "character",
values_to = "voice") %>%
mutate(across(
"character",
~ str_remove(.x, "va_") %>%
str_to_title()
))
d_voice %>% head()d_monster_type <- d %>%
select(index, monster_type) %>%
unnest(cols = c(monster_type)) %>%
drop_na(monster_type) %>%
filter(monster_type != "") %>%
mutate(monster_type = str_trim(monster_type)) %>%
mutate(monster_type = recode(
monster_type,
Disugised = "Disguised",
Disguise = "Disguised",
`Possessed Object` = "Possessed"))
d_monster_type %>% head()d_monster_subtype <- d %>%
select(index, monster_subtype) %>%
unnest(cols = c(monster_subtype)) %>%
drop_na(monster_subtype) %>%
filter(monster_subtype != "") %>%
mutate(monster_subtype = str_trim(monster_subtype))
d_monster_subtype %>% head()d_monster_species <- d %>%
select(index, monster_species) %>%
unnest(cols = c(monster_species)) %>%
drop_na(monster_species) %>%
filter(monster_species != "") %>%
mutate(monster_species = str_trim(monster_species))
d_monster_species %>% head()d_monster_gender <- d %>%
select(index, monster_gender) %>%
unnest(cols = c(monster_gender)) %>%
drop_na(monster_gender) %>%
filter(monster_gender != "") %>%
mutate(monster_gender = str_trim(monster_gender))
d_monster_gender %>% head()d_culprit <- d %>%
select(index, culprit_gender) %>%
unnest(cols = c(culprit_gender)) %>%
drop_na(culprit_gender) %>%
filter(culprit_gender != "") %>%
mutate(culprit_gender = str_trim(culprit_gender))
d_culprit %>% head()Main dataset after cleaning and wrangling.
dskim(d)| Name | d |
| Number of rows | 603 |
| Number of columns | 78 |
| _______________________ | |
| Column type frequency: | |
| character | 17 |
| Date | 1 |
| factor | 1 |
| list | 7 |
| logical | 33 |
| numeric | 19 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| aired_decade | 0 | 1.00 | 5 | 5 | 0 | 7 | 0 |
| and_that | 528 | 0.12 | 3 | 80 | 0 | 64 | 0 |
| format | 0 | 1.00 | 5 | 21 | 0 | 5 | 0 |
| if_it_wasnt_for | 414 | 0.31 | 3 | 116 | 0 | 107 | 0 |
| motive | 67 | 0.89 | 4 | 16 | 0 | 27 | 0 |
| network | 0 | 1.00 | 3 | 20 | 0 | 11 | 0 |
| number_of_snacks | 1 | 1.00 | 1 | 17 | 0 | 18 | 0 |
| season | 0 | 1.00 | 1 | 9 | 0 | 7 | 0 |
| series_name | 0 | 1.00 | 4 | 42 | 0 | 29 | 0 |
| setting_country_state | 0 | 1.00 | 4 | 16 | 0 | 79 | 0 |
| setting_terrain | 0 | 1.00 | 3 | 8 | 0 | 15 | 0 |
| title | 0 | 1.00 | 4 | 76 | 0 | 602 | 0 |
| va_daphnie | 165 | 0.73 | 11 | 26 | 0 | 9 | 0 |
| va_fred | 219 | 0.64 | 9 | 18 | 0 | 5 | 0 |
| va_scooby | 28 | 0.95 | 10 | 12 | 0 | 5 | 0 |
| va_shaggy | 32 | 0.95 | 10 | 15 | 0 | 7 | 0 |
| va_velma | 218 | 0.64 | 9 | 19 | 0 | 12 | 0 |
Variable type: Date
| skim_variable | n_missing | complete_rate | min | max | median | n_unique |
|---|---|---|---|---|---|---|
| aired_date | 0 | 1 | 1969-09-13 | 2021-02-25 | 1988-09-10 | 448 |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| aired_month | 0 | 1 | TRUE | 12 | okt: 158, sep: 105, nov: 96, dec: 56 |
Variable type: list
| skim_variable | n_missing | complete_rate | n_unique | min_length | max_length |
|---|---|---|---|---|---|
| culprit_gender | 163 | 0.73 | 23 | 1 | 11 |
| culprit_name | 163 | 0.73 | 433 | 1 | 11 |
| monster_gender | 87 | 0.86 | 43 | 1 | 19 |
| monster_name | 87 | 0.86 | 481 | 1 | 17 |
| monster_species | 87 | 0.86 | 194 | 1 | 19 |
| monster_subtype | 88 | 0.85 | 269 | 1 | 19 |
| monster_type | 87 | 0.86 | 132 | 1 | 19 |
Variable type: logical
| skim_variable | n_missing | complete_rate | mean | count |
|---|---|---|---|---|
| appears_batman | 0 | 1.00 | 0.01 | FAL: 599, TRU: 4 |
| appears_blue_falcon | 0 | 1.00 | 0.05 | FAL: 570, TRU: 33 |
| appears_hex_girls | 0 | 1.00 | 0.01 | FAL: 597, TRU: 6 |
| appears_scooby_dum | 0 | 1.00 | 0.03 | FAL: 586, TRU: 17 |
| appears_scrappy_doo | 0 | 1.00 | 0.27 | FAL: 438, TRU: 165 |
| arrested | 155 | 0.74 | 0.85 | TRU: 381, FAL: 67 |
| captured_daphnie | 165 | 0.73 | 0.21 | FAL: 347, TRU: 91 |
| captured_fred | 219 | 0.64 | 0.18 | FAL: 313, TRU: 71 |
| captured_scooby | 28 | 0.95 | 0.14 | FAL: 492, TRU: 83 |
| captured_shaggy | 32 | 0.95 | 0.15 | FAL: 486, TRU: 85 |
| captured_velma | 218 | 0.64 | 0.19 | FAL: 311, TRU: 74 |
| caught_daphnie | 165 | 0.73 | 0.07 | FAL: 409, TRU: 29 |
| caught_fred | 219 | 0.64 | 0.34 | FAL: 252, TRU: 132 |
| caught_not | 0 | 1.00 | 0.05 | FAL: 572, TRU: 31 |
| caught_other | 0 | 1.00 | 0.14 | FAL: 519, TRU: 84 |
| caught_scooby | 28 | 0.95 | 0.28 | FAL: 415, TRU: 160 |
| caught_shaggy | 32 | 0.95 | 0.13 | FAL: 494, TRU: 77 |
| caught_velma | 218 | 0.64 | 0.11 | FAL: 344, TRU: 41 |
| door_gag | 0 | 1.00 | 0.10 | FAL: 544, TRU: 59 |
| monster_real | 87 | 0.86 | 0.22 | FAL: 404, TRU: 112 |
| non_suspect | 160 | 0.73 | 0.10 | FAL: 397, TRU: 46 |
| snack_daphnie | 165 | 0.73 | 0.11 | FAL: 389, TRU: 49 |
| snack_fred | 219 | 0.64 | 0.05 | FAL: 366, TRU: 18 |
| snack_scooby | 27 | 0.96 | 0.02 | FAL: 564, TRU: 12 |
| snack_shaggy | 31 | 0.95 | 0.08 | FAL: 529, TRU: 43 |
| snack_velma | 218 | 0.64 | 0.08 | FAL: 356, TRU: 29 |
| trap_work_first | 354 | 0.41 | 0.50 | TRU: 125, FAL: 124 |
| unmask_daphnie | 165 | 0.73 | 0.08 | FAL: 401, TRU: 37 |
| unmask_fred | 219 | 0.64 | 0.27 | FAL: 282, TRU: 102 |
| unmask_other | 0 | 1.00 | 0.06 | FAL: 568, TRU: 35 |
| unmask_scooby | 28 | 0.95 | 0.04 | FAL: 552, TRU: 23 |
| unmask_shaggy | 32 | 0.95 | 0.02 | FAL: 558, TRU: 13 |
| unmask_velma | 218 | 0.64 | 0.24 | FAL: 291, TRU: 94 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| aired_year | 0 | 1.00 | 1994.05 | 16.99 | 1969.0 | 1979.0 | 1988.0 | 2011.5 | 2021.0 | ▇▇▁▅▇ |
| another_mystery | 219 | 0.64 | 0.18 | 0.41 | 0.0 | 0.0 | 0.0 | 0.0 | 3.0 | ▇▂▁▁▁ |
| culprit_amount | 0 | 1.00 | 1.04 | 1.07 | 0.0 | 0.0 | 1.0 | 1.0 | 11.0 | ▇▁▁▁▁ |
| engagement | 15 | 0.98 | 580.33 | 4807.92 | 7.0 | 27.0 | 54.5 | 128.2 | 100951.0 | ▇▁▁▁▁ |
| imdb | 15 | 0.98 | 7.28 | 0.73 | 4.2 | 6.9 | 7.3 | 7.7 | 9.6 | ▁▁▇▆▁ |
| index | 0 | 1.00 | 302.00 | 174.22 | 1.0 | 151.5 | 302.0 | 452.5 | 603.0 | ▇▇▇▇▇ |
| monster_amount | 0 | 1.00 | 1.75 | 2.29 | 0.0 | 1.0 | 1.0 | 2.0 | 19.0 | ▇▁▁▁▁ |
| quote_groovy | 32 | 0.95 | 0.06 | 0.62 | 0.0 | 0.0 | 0.0 | 0.0 | 14.0 | ▇▁▁▁▁ |
| quote_jeepers | 165 | 0.73 | 0.56 | 1.30 | 0.0 | 0.0 | 0.0 | 1.0 | 10.0 | ▇▁▁▁▁ |
| quote_jinkies | 218 | 0.64 | 1.31 | 1.92 | 0.0 | 0.0 | 1.0 | 2.0 | 13.0 | ▇▁▁▁▁ |
| quote_just_about_wrapped_up | 218 | 0.64 | 0.05 | 0.22 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | ▇▁▁▁▁ |
| quote_my_glasses | 218 | 0.64 | 0.12 | 0.34 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | ▇▁▁▁▁ |
| quote_rooby_rooby_roo | 28 | 0.95 | 0.72 | 0.79 | 0.0 | 0.0 | 1.0 | 1.0 | 7.0 | ▇▁▁▁▁ |
| quote_scooby_doo_where_are_you | 32 | 0.95 | 0.13 | 0.42 | 0.0 | 0.0 | 0.0 | 0.0 | 4.0 | ▇▁▁▁▁ |
| quote_zoinks | 32 | 0.95 | 2.15 | 2.73 | 0.0 | 0.0 | 1.0 | 3.0 | 26.0 | ▇▁▁▁▁ |
| run_time | 0 | 1.00 | 23.52 | 17.21 | 4.0 | 12.0 | 22.0 | 23.0 | 94.0 | ▇▃▁▁▁ |
| set_a_trap | 219 | 0.64 | 0.12 | 0.34 | 0.0 | 0.0 | 0.0 | 0.0 | 2.0 | ▇▁▁▁▁ |
| split_up | 219 | 0.64 | 0.34 | 0.55 | 0.0 | 0.0 | 0.0 | 1.0 | 2.0 | ▇▁▃▁▁ |
| suspects_amount | 0 | 1.00 | 2.85 | 2.62 | 0.0 | 1.0 | 3.0 | 4.0 | 20.0 | ▇▂▁▁▁ |
Number of episodes and movies by decade
d %>%
select(aired_decade, format) %>%
mutate(format = recode(
format,
`TV Series (segmented)` = "TV Series",
`Movie (Theatrical)` = "Movie"
)) %>%
group_by(aired_decade) %>%
count(format) %>%
pivot_wider(names_from = format,
values_from = n) %>%
mutate(across(
c("TV Series", "Crossover", "Movie"),
~ replace_na(.x, 0))) %>%
rename(Decade = aired_decade)IMDb rating by decade
d %>%
group_by(aired_decade) %>%
rstatix::get_summary_stats(imdb) %>%
select(-variable)Highest rated show
d %>%
filter(imdb == max(imdb, na.rm = TRUE)) %>%
select(aired_date, format, title, imdb) %>%
rename(date = aired_date)Lowest rated show
d %>%
filter(imdb == min(imdb, na.rm = TRUE)) %>%
select(aired_date, format, title) %>%
rename(date = aired_date)d_captured %>%
filter(!is.na(captured)) %>%
tabyl(character, captured) %>%
adorn_percentages(denominator = "row") %>%
arrange(desc(`TRUE`)) %>%
adorn_pct_formatting() %>%
select(character, `TRUE`, `FALSE`)d_caught %>%
filter(!is.na(caught)) %>%
tabyl(character, caught) %>%
adorn_percentages(denominator = "row") %>%
arrange(desc(`TRUE`)) %>%
adorn_pct_formatting() %>%
select(character, `TRUE`, `FALSE`)d_unmask %>%
filter(!is.na(unmask)) %>%
tabyl(character, unmask) %>%
adorn_percentages(denominator = "row") %>%
arrange(desc(`TRUE`)) %>%
adorn_pct_formatting() %>%
select(character, `TRUE`, `FALSE`)d_snack %>%
filter(!is.na(snack)) %>%
tabyl(character, snack) %>%
adorn_percentages(denominator = "row") %>%
arrange(desc(`TRUE`)) %>%
adorn_pct_formatting() %>%
select(character, `TRUE`, `FALSE`)d_quote %>%
group_by(quote) %>%
summarise(total = sum(n, na.rm = TRUE)) %>%
arrange(desc(total))d_voice %>%
drop_na(voice) %>%
group_by(character) %>%
count(voice)d_monster_gender %>%
tabyl(monster_gender) %>%
arrange(desc(n)) %>%
adorn_pct_formatting() %>%
rename(gender = monster_gender)Top 10 of most frequent monster types
d_monster_type %>%
tabyl(monster_type) %>%
slice_max(order_by = n, n = 10) %>%
adorn_pct_formatting() %>%
rename(type = monster_type)Top 10 of most frequent monster subtypes
d_monster_subtype %>%
tabyl(monster_subtype) %>%
slice_max(order_by = n, n = 10) %>%
adorn_pct_formatting() %>%
rename(subtype = monster_subtype)Top 10 of most frequent monster species
d_monster_species %>%
tabyl(monster_species) %>%
slice_max(order_by = n, n = 10) %>%
adorn_pct_formatting() %>%
rename(species = monster_species)d_culprit %>%
tabyl(culprit_gender) %>%
arrange(desc(n)) %>%
adorn_pct_formatting() %>%
rename(gender = culprit_gender)d %>%
drop_na(motive) %>%
tabyl(motive) %>%
slice_max(order_by = n, n = 10) %>%
adorn_pct_formatting()ggplot(
d %>%
select(aired_decade, monster_gender) %>%
unnest(monster_gender) %>%
group_by(aired_decade, monster_gender) %>%
filter(monster_gender %in% c("Male", "Female")) %>%
tally(),
aes(
fill = as.factor(monster_gender),
y = n,
x = as.factor(aired_decade)
)) +
labs(
title = "Monsters gender",
subtitle = "Male and female monsters by decade in Scooby Doo",
x = "decade aired",
y = "") +
geom_bar(position = "fill", stat = "identity", linetype = "blank") +
scale_y_continuous(labels = scales::percent) +
scale_fill_manual(
values = c("Female" = "lightpink", "Male" = "skyblue"),
name = "Gender") +
hrbrthemes::theme_ipsum_rc()ggplot(
d %>%
select(aired_decade, culprit_gender) %>%
unnest(cols = c(culprit_gender)) %>%
drop_na(culprit_gender) %>%
filter(culprit_gender != "") %>%
mutate(culprit_gender = str_trim(culprit_gender)) %>%
group_by(aired_decade, culprit_gender) %>%
tally(),
aes(
fill = fct_rev(as.factor(culprit_gender)),
y = n,
x = fct_rev(as.factor(aired_decade))
)) +
labs(
title = "Culprit gender",
subtitle = "Male and female culprits by decade in Scooby Doo",
x = "Decade aired",
y = "") +
ggchicklet::geom_chicklet(
position = position_fill(),
radius = grid::unit(5, "pt")) +
scale_y_continuous(
labels = scales::percent) +
scale_fill_manual(
values = c("Female" = "lightpink", "Male" = "skyblue"),
name = "Gender") +
coord_flip() +
hrbrthemes::theme_ipsum_rc()sort_order <- d_captured %>%
filter(captured) %>%
count(character) %>%
arrange(n) %>%
pull(character)
ggplot(
left_join(d_captured, select(d, index, aired_decade)) %>%
filter(captured) %>%
rename(decade = aired_decade) %>%
group_by(decade, character) %>%
tally() %>%
rename(captured = n),
aes(
x = fct_relevel(as.factor(character), sort_order),
fill = as.factor(decade),
y = captured)
) +
ggchicklet::geom_chicklet(radius = grid::unit(5, "pt")) +
scale_fill_viridis_d(option = "viridis", direction = -1) +
labs(
x = "Character",
y = "Cumulative count",
title = "Captured",
subtitle = "By character and decade",
caption = "Created by Philomenix",
fill = "Decade") +
coord_flip() +
hrbrthemes::theme_ipsum_rc()sort_order <- d_caught %>%
filter(caught & !(character %in% c("Not", "Other"))) %>%
count(character) %>%
arrange(n) %>%
pull(character)
ggplot(
left_join(d_caught, select(d, index, aired_decade)) %>%
filter(caught) %>%
filter(!(character %in% c("Not", "Other"))) %>%
rename(decade = aired_decade) %>%
group_by(decade, character) %>%
tally() %>%
rename(caught = n),
aes(
x = fct_relevel(as.factor(character), sort_order),
fill = as.factor(decade),
y = caught)
) +
ggchicklet::geom_chicklet(radius = grid::unit(5, "pt")) +
scale_fill_viridis_d(option = "viridis", direction = -1) +
labs(
x = "Character",
y = "Cumulative count",
title = "Caught",
subtitle = "By character and decade",
caption = "Created by Philomenix",
fill = "Decade") +
coord_flip() +
hrbrthemes::theme_ipsum_rc()